*This file creates descriptive graphs of higher degree graduation ages available in the 1970 census.


*Use ready-made census data
use shnro ktutk vuosi saikavv ika syntyv  using "D:\ready-made\FOLK_vl_7085\vl7085_1.dta" , clear
keep if vuosi==1970

gen age44=1944-syntyv
tab age44
keep if age44>4&age44<46

destring saikavv , replace
gen ktutk1=substr(ktutk,1,1)
destring ktutk1, replace 
replace  saikavv=. if saikavv==9999
drop if ktutk=="999999"
gen graduated=saikavv-syntyv

gen grad_y=(saikavv!=.)

sum graduated if ktutk1>5&ktutk1<8, d
*  mean  27.43437

sum graduated if ktutk1>5&ktutk1<8&age44>25&age44<31, d
*mean    28.85

tw (kdensity graduated if ktutk1>5&ktutk1<8) (kdensity graduated if ktutk1>5&ktutk1<8&age44>25&age44<30),  text(0.13 38 "Mean 28.85 (Median 28)", color(maroon))  text(0.05 41  "Mean 27.4 (Median 27)", color(navy)) graphregion(lcolor(white) fcolor(white)) title(Graduation age with higher degree) ytitle(Density) xtitle(Graduation age) legend(label(1 "Cohorts 5-45 in 1944") label(2 "Cohorts 25-30 in 1944")) xline(27.43437, lcolor(navy) lpattern(dash)) xline(28.76235, lcolor(maroon) lpattern(dash))  xline(28, lcolor(maroon))  xline(27, lcolor(navy))
graph save "$temp\graphs\grad_age_histograms_all_density", replace 
graph export "$temp\graphs\grad_age_histograms_all_density.pdf", replace 


**No outlier so below 40 

sum graduated if ktutk1>5&ktutk1<8&graduated<41, d
*mean 27.1, median 27


sum graduated if ktutk1>5&ktutk1<8&age44>25&age44<31&graduated<41, d
*mean 27.8, median 28


tw (kdensity graduated if ktutk1>5&ktutk1<8&graduated<41) (kdensity graduated if ktutk1>5&ktutk1<8&age44>25&age44<30&graduated<41),  text(0.13 34 "Mean 27.9 (Median 28)", color(maroon))  text(0.05 37  "Mean 27.1 (Median 27)", color(navy)) graphregion(lcolor(white) fcolor(white)) title(Graduation age with higher degree) ytitle(Density) xtitle(Graduation age) legend(label(1 "Cohorts 5-45 in 1944") label(2 "Cohorts 25-30 in 1944")) xline(27.1, lcolor(navy) lpattern(dash)) xline(27.9, lcolor(maroon) lpattern(dash))  xline(28, lcolor(maroon))  xline(27, lcolor(navy))
graph save "$temp\graphs\grad_age_histograms_all_density_outliers", replace 
graph save "$temp\graphs\grad_age_histograms_all_density_outliers", replace 
graph export "$temp\graphs\grad_age_histograms_all_density_outliers.pdf", replace 


*Density

keep  if ktutk1>5&ktutk1<8

gen ind=1

gen ind2=1 if age44>25&age44<31

drop if graduated==.

collapse (sum) ind ind2, by(graduated)

egen total=total(ind)

gen share=ind/total

gen total_share=sum(share)

egen total2=total(ind2)

gen share2=ind2/total2

gen total_share2=sum(share2)


tw (line total_share graduated) (line total_share2 graduated, lcolor(maroon)), ytitle(Cumulative distribution)  graphregion(lcolor(white) fcolor(white)) title(Graduation age with higher degree) xtitle(Graduation age)  legend(label(1 "Cohorts 5-45 in 1944") label(2 "Cohorts 25-30 in 1944")) xline(29, lcolor(maroon) lpattern(dash))   text(.6 39  "29%  graduated age 30 or over ", color(maroon))
graph save "$temp\graphs\grad_age_all_cumu", replace 
graph export "$temp\graphs\grad_age_all_cumu.pdf", replace 


*no outliers over 40

drop if graduated>40

drop total share total_share total2 share2 total_share2

egen total=total(ind)

gen share=ind/total

gen total_share=sum(share)


egen total2=total(ind2)

gen share2=ind2/total2

gen total_share2=sum(share2)


tw (line total_share graduated) (line total_share2 graduated, lcolor(maroon)), ytitle(Cumulative distribution)  graphregion(lcolor(white) fcolor(white)) title(Graduation age with higher degree) xtitle(Graduation age)  legend(label(1 "Cohorts 5-45 in 1944") label(2 "Cohorts 25-30 in 1944")) xline(29, lcolor(maroon) lpattern(dash))   text(.6 35  "25%  graduated age 30 or over ", color(maroon))
graph save "$temp\graphs\grad_age_all_outliers_cumu", replace 
graph export "$temp\graphs\grad_age_all_cumu_outliers.pdf", replace 






